In [1]:
%matplotlib inline
import matplotlib
import numpy as np
from matplotlib import pyplot as plt
from sklearn.cluster import KMeans

Disjoint clusters

generate random data


In [2]:
np.random.seed(1) # random seed for consistency
N = 100
k = 4

cov_mat = np.eye(2)/10

all_samples1 = np.vstack([np.random.multivariate_normal([-1, -1], cov_mat, N),
                          np.random.multivariate_normal([-1, +1], cov_mat, N),
                          np.random.multivariate_normal([+1, -1], cov_mat, N),
                          np.random.multivariate_normal([+1, +1], cov_mat, N)])
cols1 = [0] * N + [1] * N + [2] * N + [3] * N

plot data with original cluster colors


In [3]:
fig = plt.figure(figsize=(5,5))
ax = fig.add_subplot(1,1,1)
ax.scatter(all_samples1[:,0], all_samples1[:,1], s=40, c=cols1, alpha=1, linewidth=0)
plt.show()


Performing K-means with K=4


In [4]:
kmeans_model1 = KMeans(n_clusters=4, random_state=1).fit(all_samples1)
labels1 = kmeans_model1.labels_

Scatter plot of the clustering results


In [5]:
labels1 = kmeans_model1.labels_
fig = plt.figure(figsize=(5,5))
ax = fig.add_subplot(1,1,1)
ax.scatter(all_samples1[:,0], all_samples1[:,1], s=40, c=labels1, alpha=1, linewidth=0)
plt.show()


Example with two concentric rings


In [6]:
np.random.seed(1) # random seed for consistency
N = 200

theta = np.random.uniform(size=(2*N, 1)) * 2 * np.pi
r     = np.vstack([np.random.uniform(low=0.8, high=1.2, size=(N, 1)),
                   np.random.uniform(low=1.8, high=2.2, size=(N, 1))])

x = np.multiply(r, np.cos(theta))
y = np.multiply(r, np.sin(theta))

all_samples2 = np.hstack([x, y])
cols2 = np.array([(1.0, 0.2, 0.2)]*N + [(0.5, 0.5, 1)]*N)

In [7]:
fig = plt.figure(figsize=(5,5))
ax = fig.add_subplot(1,1,1)
ax.scatter(all_samples2[:,0], all_samples2[:,1], s=40, c=cols2, alpha=1, linewidth=0)
plt.show()


performing K-means with K=2


In [8]:
kmeans_model2 = KMeans(n_clusters=2, random_state=1).fit(all_samples2)
labels2 = kmeans_model2.labels_

Scatter plot of the clustering results


In [9]:
fig = plt.figure(figsize=(5,5))
ax = fig.add_subplot(1,1,1)
ax.scatter(all_samples2[:,0], all_samples2[:,1], s=40, c=labels2, alpha=1, linewidth=0)
plt.show()